TODOs

  • clean up text in this doc

Overview

IRAP trial-type D scores are calculated from an average of only 18 pairs of reaction times. This would be deemed as far too low anywhere else in the literature on reaction time based tasks. The implications of this can be seen in how poorly estimated any one IRAP D score is. We can observe this by bootstrapping reaction times for each participant’s D scores and noting how wide their confidence intervals are.

# dependencies
library(tidyverse)
library(knitr)
library(kableExtra)
library(boot)
library(parallel)
library(bayestestR)
library(patchwork)
library(mdthemes)
library(lme4)
library(sjPlot)
library(emmeans)
library(ggstance)
library(janitor)
# library(merTools) called via merTools:: to avoid namespace collisions between MASS and dplyr


# set seed for reproducibility
set.seed(42)

# options
options(knitr.table.format = "html") # necessary configuration of tables

# disable scientific notation
options(scipen = 999) 


# function to round all numeric vars in a data frame
round_df <- function(df, n_digits = 3) {
  require(janitor)
  df %>% mutate_if(is.numeric, janitor::round_half_up, digits = n_digits)
}


# create necessary directories
dir.create("../data/processed")
dir.create("../data/results")
#dir.create("models")


# get data 
data_trial_level <- read_csv("../data/raw/data_trial_level.csv") %>%
  filter(timepoint == "baseline" & (age >= 18 | is.na(age)))

# outliers
data_outliers <- data_trial_level %>%
  distinct(unique_id, .keep_all = TRUE) %>%
  select(unique_id, domain, mean_rt) %>%
  mutate(median_mean_rt = median(mean_rt, na.rm = TRUE),
         mad_mean_rt = mad(mean_rt, na.rm = TRUE)) %>%
  # exclude median +- 2MAD
  mutate(rt_outlier = ifelse(mean_rt < median_mean_rt-mad_mean_rt*2 |
                               mean_rt > median_mean_rt+mad_mean_rt*2, TRUE, FALSE)) %>%
  filter(rt_outlier == FALSE) %>%
  select(unique_id, rt_outlier) %>%
  full_join(data_trial_level, by = "unique_id") %>%
  mutate(rt_outlier = ifelse(is.na(rt_outlier), TRUE, rt_outlier))

data_outliers_removed <- data_outliers %>%
  filter(rt_outlier == FALSE)

# trim RTs>10000 ms, as part of D scoring
data_trimmed <- data_outliers_removed %>%
  select(unique_id, domain, trial_type, rt, block_type) %>%
  filter(rt <= 10000)

Sample descriptives

data_outliers %>%
  distinct(unique_id, .keep_all = TRUE) %>%
  count(rt_outlier) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
rt_outlier n
FALSE 1462
TRUE 109
data_descriptives <- data_outliers_removed %>%
  distinct(unique_id, .keep_all = TRUE)

data_descriptives %>%
  count(domain) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
domain n
Body image 21
Clinton-Trump 97
Countries (1) 53
Countries (2) 50
Death (1) 17
Death (2) 20
Death (3) 26
Disgust (1) 37
Disgust (2) 43
Friend-Enemy 98
Gender (1) 41
Gender (2) 95
Lincoln-Hitler 132
Personality - Agreeableness 39
Personality - Conscientiousness 39
Personality - Extraversion 33
Personality - Neuroticism 32
Personality - Openness 33
Race (1) 45
Race (2) 44
Religion 30
Rich-Poor 84
Sexuality (1) 26
Sexuality (2) 19
Shapes & colors (1) 11
Shapes & colors (2) 10
Shapes & colors (3) 10
Shapes & colors (4) 8
Shapes & colors (5) 22
Shapes & colors (6) 26
Shapes & colors (7) 14
Stigma - ADHD 62
Stigma - PTSD 54
Stigma - Schizophrenia 54
Valenced words 37
data_descriptives %>%
  count(domain) %>%
  summarize(total_n = sum(n),
            min_n_per_domain = min(n),
            max_n_per_domain = max(n),
            mean_n_per_domain = round_half_up(mean(n, na.rm = TRUE), 2),
            median_n_per_domain = round_half_up(median(n, na.rm = TRUE), 2),
            sd_n_per_domain = round_half_up(sd(n, na.rm = TRUE), 2)) %>%
  gather() %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
key value
total_n 1462.00
min_n_per_domain 8.00
max_n_per_domain 132.00
mean_n_per_domain 41.77
median_n_per_domain 37.00
sd_n_per_domain 28.90
data_descriptives %>%
  summarize(min_age  = round_half_up(min(age, na.rm = TRUE), 2),
            max_age  = round_half_up(max(age, na.rm = TRUE), 2),
            mean_age = round_half_up(mean(age, na.rm = TRUE), 2),
            sd_age   = round_half_up(sd(age, na.rm = TRUE), 2)) %>%
  gather() %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
key value
min_age 18.00
max_age 60.00
mean_age 20.12
sd_age 4.39
data_descriptives %>%
  count(gender) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
gender n
Female 218
Male 135
Other 1
NA 1108

Distribution of RTs

In order to convey how small these effects are and what they’re estimated from for a given participant.

All RTs

data_trimmed %>%
  mutate(trial_type = case_when(trial_type == "tt1" ~ "Trial type 1",
                                trial_type == "tt2" ~ "Trial type 2",
                                trial_type == "tt3" ~ "Trial type 3",
                                trial_type == "tt4" ~ "Trial type 4")) %>%
  ggplot(aes(rt, fill = block_type)) +
  geom_density(alpha = 0.3) +
  facet_wrap(~ trial_type, ncol = 1) +
  mdthemes::md_theme_linedraw() +
  scale_fill_viridis_d(begin = 0.3, end = 0.7) +
  ylab("Frequency") +
  xlab("Reaction time (ms)") +
  labs(fill = "Block type")

# data_trimmed %>%
#   group_by(unique_id, trial_type) %>%
#   summarize(mean_rt_con = mean(rt[block_type == "con"], na.rm = TRUE),
#             mean_rt_incon = mean(rt[block_type == "incon"], na.rm = TRUE),
#             sd_rt_con = sd(rt[block_type == "con"], na.rm = TRUE),
#             sd_rt_incon = sd(rt[block_type == "con"], na.rm = TRUE),
#             sd_rt = sd(rt, na.rm = TRUE),
#             .groups = "drop") %>%
#   mutate(diff_mean_rt = mean_rt_incon - mean_rt_con) %>%
#   select(-unique_id) %>%
#   group_by(trial_type) %>%
#   summarize_all(median) %>%
#   round_df(0) %>%
#   select(trial_type, 
#          median_mean_rt_con = mean_rt_con, 
#          median_mean_rt_incon = mean_rt_incon, 
#          median_diff_mean_rt = diff_mean_rt, 
#          median_sd_rt_con = sd_rt_con, 
#          median_sd_rt_incon = sd_rt_incon, 
#          median_sd_rt = sd_rt) %>%
#   kable() %>%
#   kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)

Single participant with median D score (D = 0.10)

data_eg_1 <- data_trimmed %>%
  filter(unique_id == "ian death emma_43" & trial_type == "tt1") 
#filter(unique_id == "ian death emma_15" & trial_type == "tt1") 
#filter(unique_id == "ian death june_57" & trial_type == "tt1") 


#dat <- data_trimmed %>%
data_eg_1 %>%  
  group_by(unique_id, trial_type) %>%
  summarize(mean_rt_con = mean(rt[block_type == "con"], na.rm = TRUE),
            mean_rt_incon = mean(rt[block_type == "incon"], na.rm = TRUE),
            sd_rt = sd(rt, na.rm = TRUE)) %>%
  mutate(D = (mean_rt_incon - mean_rt_con)/sd_rt)
## # A tibble: 1 × 6
## # Groups:   unique_id [1]
##   unique_id         trial_type mean_rt_con mean_rt_incon sd_rt      D
##   <chr>             <chr>            <dbl>         <dbl> <dbl>  <dbl>
## 1 ian death emma_43 tt1              1350.         1391.  427. 0.0948
ggplot(data_eg_1, aes(rt, block_type)) +
  geom_point(alpha = 0.5, position = position_jitter(height = 0.2, 
                                                     width = 0,
                                                     seed = 43)) +
  mdthemes::md_theme_linedraw() +
  labs(x = "Reaction time (ms)",
       y = "Block type") 

Bootstrap confidence intervals

Calculate scores

D scores

Calculated by domain and trial-type. 1+ hour runtime on a good PC.

# bootstrapping has a long execution time, so load saved values if they've already been calculated
if(file.exists("../data/processed/data_estimates_D.csv")) {
  
  data_estimates_D <- read_csv("../data/processed/data_estimates_D.csv") %>%
    filter(method == "bca")
  
} else {
  
  D_score <- function(data, i) {
    data_with_indexes <- data[i,] # boot function requires data and index
    mean_con <- mean(data_with_indexes$rt[data_with_indexes$block_type == "con"], na.rm = TRUE)
    mean_incon <- mean(data_with_indexes$rt[data_with_indexes$block_type == "incon"], na.rm = TRUE)
    sd <- sd(data_with_indexes$rt, na.rm = TRUE)
    D <- (mean_incon - mean_con) / sd
    return(D)
  }
  
  bootstrap_D_score <- function(data){
    
    require(dplyr)
    require(boot)
    
    fit <- 
      boot::boot(data      = data, 
                 statistic = D_score, 
                 R         = 5000, 
                 sim       = "ordinary", 
                 stype     = "i",
                 parallel  = "multicore", 
                 ncpus     = parallel::detectCores())
    
    results <- boot::boot.ci(fit, conf = 0.95, type = "bca")
    
    output <- 
      tibble(estimate = rep(fit$t0, 4),
             ci_lower = c(results$normal[2], results$basic[4], results$percent[4], results$bca[4]),
             ci_upper = c(results$normal[3], results$basic[5], results$percent[5], results$bca[5]))
    
    return(output)
  }
  
  # bootstrap D scores 
  data_estimates_D <- data_trimmed %>%
    select(unique_id, domain, trial_type, rt, block_type) %>%
    group_by(unique_id, domain, trial_type) %>%
    do(bootstrap_D_score(data = .)) %>%
    ungroup() %>%
    mutate(sig = ifelse((ci_lower < 0 & ci_upper < 0) | (ci_lower > 0 & ci_upper > 0), TRUE, FALSE),
           ci_width = ci_upper - ci_lower) %>%
    round_df(3)
  
  # save to disk
  write_csv(data_estimates_D, "../data/processed/data_estimates_D.csv")
  
}

PI scores

Calculated by domain and trial-type. 1+ hour runtime on a good PC.

# bootstrapping has a long execution time, so load saved values if they've already been calculated
if(file.exists("../data/processed/data_estimates_PI.csv")) {
  
  data_estimates_PI <- read_csv("../data/processed/data_estimates_PI.csv") %>%
    filter(method == "bca")
  
} else {
  
  # Fast calculation of the A statistic - code from Ruscio (2008) supplementary materials
  PI_score <- function(data, i) {
    data_with_indexes <- data[i,] # boot function requires data and index
    x  <- na.omit(data_with_indexes$rt[data_with_indexes$block_type == "incon"])
    y  <- na.omit(data_with_indexes$rt[data_with_indexes$block_type == "con"])
    nx <- length(x)
    ny <- length(y)
    rx <- sum(rank(c(x, y))[1:nx])
    PI <- (rx / nx - (nx + 1) / 2) / ny
    return(PI)
  }
  
  bootstrap_PI_score <- function(data){
    
    require(dplyr)
    require(boot)
    
    fit <- 
      boot::boot(data      = data, 
                 statistic = PI_score, 
                 R         = 5000, 
                 sim       = "ordinary", 
                 stype     = "i",
                 parallel  = "multicore", 
                 ncpus     = parallel::detectCores())
    
    results <- boot::boot.ci(fit, conf = 0.95, type = "bca")
    
    output <- 
      tibble(estimate = rep(fit$t0, 4),
             ci_lower = c(results$normal[2], results$basic[4], results$percent[4], results$bca[4]),
             ci_upper = c(results$normal[3], results$basic[5], results$percent[5], results$bca[5]))
    
    return(output)
  }
  
  # bootstrap PI scores 
  data_estimates_PI <- data_outliers_removed %>%
    group_by(unique_id, domain, trial_type) %>%
    do(bootstrap_PI_score(data = .)) %>%
    ungroup() %>%
    mutate(sig = ifelse((ci_lower < 0.50 & ci_upper < 0.50) | (ci_lower > 0.50 & ci_upper > 0.50), TRUE, FALSE),
           ci_width = ci_upper - ci_lower) %>%
    round_df(3)
  
  # save to disk
  write_csv(data_estimates_PI, "../data/processed/data_estimates_PI.csv")
  
}

CI widths

Widths cant be directly compared between D and PI as they have different ranges, so D scores only.

Not meta analyzed as extreme skew in data means that residuals are very non-normal, violating assumptions and underestimating MAP estimates. Instead I simply present MAP estimates.

MAP-MAP width

Most probable estimate among the most probable estimates

data_map_ci_widths <- data_estimates_D %>%
  group_by(domain, trial_type) %>%
  do(point_estimate(.$ci_width, centrality = "MAP")) %>%
  ungroup()

data_map_ci_widths %>%
  summarize(map_map = point_estimate(MAP, centrality = "MAP"),
            min_map = min(MAP),
            max_map = max(MAP)) %>%
  unnest(map_map) %>%
  rename(MAP_MAP = MAP) %>%
  round_df(2) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
MAP_MAP min_map max_map
1.31 0.75 1.35

MAP CI width

By domain and trial type using basic bootstrapping, on the basis that it has the best performance for % of non-zero D scores (further below).

data_map_ci_widths %>%
  pivot_wider(names_from = trial_type, values_from = MAP) %>%
  round_df(2) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
domain tt1 tt2 tt3 tt4
Body image 1.31 1.31 1.34 1.26
Clinton-Trump 1.29 1.31 1.31 1.31
Countries (1) 1.27 1.33 1.29 1.30
Countries (2) 1.32 1.31 1.31 1.31
Death (1) 1.29 1.30 1.15 1.33
Death (2) 1.29 1.29 1.30 1.30
Death (3) 1.25 1.31 1.29 1.32
Disgust (1) 1.12 1.13 1.13 1.13
Disgust (2) 1.28 1.31 1.30 1.24
Friend-Enemy 1.30 1.30 1.32 1.32
Gender (1) 1.32 1.31 1.32 1.31
Gender (2) 1.29 1.31 1.29 1.30
Lincoln-Hitler 0.91 0.92 0.92 1.30
Personality - Agreeableness 1.29 1.31 1.30 1.32
Personality - Conscientiousness 1.30 1.32 1.30 1.30
Personality - Extraversion 1.32 1.29 1.32 1.30
Personality - Neuroticism 1.26 1.32 1.30 1.29
Personality - Openness 1.31 1.31 1.34 1.32
Race (1) 1.29 1.31 1.30 1.30
Race (2) 0.75 0.79 0.79 0.79
Religion 1.23 1.30 1.30 1.30
Rich-Poor 1.31 1.29 1.31 1.32
Sexuality (1) 0.98 1.00 0.99 0.99
Sexuality (2) 1.01 0.90 0.98 1.01
Shapes & colors (1) 1.29 1.30 1.35 1.28
Shapes & colors (2) 1.05 1.31 1.29 1.25
Shapes & colors (3) 1.32 1.31 1.30 1.23
Shapes & colors (4) 1.16 1.32 1.31 1.35
Shapes & colors (5) 1.22 1.30 1.32 1.32
Shapes & colors (6) 1.29 1.29 1.30 1.28
Shapes & colors (7) 0.96 1.26 1.31 1.14
Stigma - ADHD 1.13 1.13 1.13 1.14
Stigma - PTSD 1.13 1.14 1.12 1.12
Stigma - Schizophrenia 1.11 1.13 1.13 1.13
Valenced words 1.21 1.33 1.32 1.31

Plot by domain and trial type

data_ci_width_map_D <- data_estimates_D %>%
  group_by(domain, trial_type) %>%
  #summarize(median = median(ci_width), .groups = "drop") %>%
  do(point_estimate(.$ci_width, centrality = "MAP")) %>%
  ungroup() %>%
  mutate(MAP = round_half_up(MAP, 3),
         trial_type = case_when(trial_type == "tt1" ~ "Trial type 1",
                                trial_type == "tt2" ~ "Trial type 2",
                                trial_type == "tt3" ~ "Trial type 3",
                                trial_type == "tt4" ~ "Trial type 4"),
         trial_type = fct_relevel(trial_type, "Trial type 1", "Trial type 2", "Trial type 3", "Trial type 4")) %>%
  #mutate(domain = fct_reorder(domain, MAP, .fun = min)) %>%
  #arrange(domain) %>%
  mutate(domain = fct_rev(domain))

# # save to disk
# write_csv(data_ci_width_map_D, "../data/results/data_ci_width_map_D.csv")

# plot
p_ci_widths <- 
  ggplot(data_ci_width_map_D, aes(MAP, domain)) + 
  geom_point(position = position_dodge(width = 0.8)) +
  mdthemes::md_theme_linedraw() +
  facet_wrap(~ trial_type, ncol = 4, nrow = 1) +
  labs(x = "Highest probability (MAP) 95% CI width",
       y = "") + 
  theme(legend.position = "top")

p_ci_widths

Proportion different from zero

Caterpillar plot

By domain, using normal bootstrapping

p_cis_by_domain <- 
  data_estimates_D %>%
  mutate(domain = str_replace(domain, "Personality - ", "Big5: "),
         domain = str_replace(domain, "Stigma - ", "Stigma: ")) %>%
  arrange(estimate) %>%
  group_by(domain) %>%
  mutate(ordered_id = row_number()/n()) %>%
  ungroup() %>%
  ggplot() +
  geom_linerange(aes(x = ordered_id, ymin = ci_lower, ymax = ci_upper, color = sig),
                 alpha = 1) +
  geom_point(aes(ordered_id, estimate), size = 0.5, shape = "square") +
  geom_hline(yintercept = 0, linetype = "dotted") +
  mdthemes::md_theme_linedraw() +
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.position = "top") +
  scale_color_viridis_d(end = 0.6, direction = -1) +
  xlab("Ranked participant") +
  ylab("*D* score") +
  labs(color = "95% CI excludes zero point") + 
  facet_wrap(~ domain, ncol = 4)

p_cis_by_domain

Calculate scores

data_diff_zero <- 
  bind_rows(mutate(data_estimates_D, DV_type = "*D* scores"),
            mutate(data_estimates_PI, DV_type = "PI scores")) %>%
  mutate(domain = as.factor(domain),
         trial_type = case_when(trial_type == "tt1" ~ "Trial type 1",
                                trial_type == "tt2" ~ "Trial type 2",
                                trial_type == "tt3" ~ "Trial type 3",
                                trial_type == "tt4" ~ "Trial type 4"),
         trial_type = fct_relevel(trial_type, "Trial type 1", "Trial type 2", "Trial type 3", "Trial type 4")) %>%
  group_by(domain, trial_type, DV_type) %>%
  summarize(proportion_diff_zero = mean(sig),
            variance = plotrix::std.error(sig)^2,
            .groups = "drop") %>%
  # model cannot be run on zero variance or 0 or 1 logit, so offset by a minuscule amount
  mutate(proportion_diff_zero_temp = case_when(proportion_diff_zero < 0.001 ~ 0.001, 
                                               proportion_diff_zero > 0.999 ~ 0.999,
                                               TRUE ~ proportion_diff_zero),
         proportion_diff_zero_logit = boot::logit(proportion_diff_zero_temp)) %>%
  select(-proportion_diff_zero_temp) %>%
  filter(!(proportion_diff_zero == 0 & variance == 0)) %>%
  mutate(variance = ifelse(variance == 0, 0.0001, variance)) 

# # save to disk
# write_csv(data_diff_zero, "../data/results/data_diff_zero.csv")

Plot

p_diff_zero <- 
  data_diff_zero %>%
  mutate(domain = fct_rev(factor(domain))) %>%
  ggplot(aes(proportion_diff_zero, domain, color = DV_type, shape = DV_type)) +
  geom_linerangeh(aes(xmin = proportion_diff_zero - sqrt(variance)*1.96,
                      xmax = proportion_diff_zero + sqrt(variance)*1.96),
                  position = position_dodge(width = 0.75)) + 
  geom_point(position = position_dodge(width = 0.75)) +
  scale_shape_manual(labels = c("*D* scores", "PI scores"), values = c(15, 16)) +
  scale_color_viridis_d(begin = 0.3, end = 0.7, labels = c("*D* scores", "PI scores")) +
  mdthemes::md_theme_linedraw() +
  facet_wrap(~ trial_type, ncol = 4) +
  labs(x = "Proportion of scores different from zero point",
       y = "",
       color = "Scoring method",
       shape = "Scoring method") + 
  theme(legend.position = "top")

p_diff_zero

Model

# fit model
fit_diff_zero <- 
  lmer(proportion_diff_zero_logit ~ 1 + DV_type + (1 | domain/trial_type),
       weights = 1/variance, 
       data = data_diff_zero)

# extract marginal means
results_emm_diff_zero <- 
  summary(emmeans(fit_diff_zero, ~ DV_type)) %>%
  dplyr::select(DV_type, estimate = emmean, se = SE, ci_lower = lower.CL, ci_upper = upper.CL)

# extract re Tau
results_re_tau_diff_zero <- fit_diff_zero %>%
  merTools::REsdExtract() %>%
  as_tibble(rownames = "trial_type") %>%
  rename(tau = value) 

# combine
results_diff_zero <- results_emm_diff_zero %>%
  mutate(pi_lower = estimate - (1.96 * sqrt(se^2 + results_re_tau_diff_zero$tau[2]^2)),  # as in metafor package's implementation of prediction intervals, see metafor::predict.rma.R 
         pi_upper = estimate + (1.96 * sqrt(se^2 + results_re_tau_diff_zero$tau[2]^2))) |>
  select(-se) |>
  mutate_if(is.numeric, boot::inv.logit)

# plot
p_prop_nonzero <- 
  ggplot(results_diff_zero, aes(DV_type, estimate, 
                                        #color = DV_type, 
                                        #shape = DV_type, 
                                        #group = DV_type
  )) +
  geom_linerange(aes(ymin = pi_lower, ymax = pi_upper), size = 0.5, position = position_dodge(width = 0.8), linetype = "dotted") +
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), size = 1.25, position = position_dodge(width = 0.8)) +
  geom_point(position = position_dodge(width = 0.8), size = 2.5) +
  mdthemes::md_theme_linedraw() +
  scale_y_continuous(breaks = c(0, .25, .5, .75, 1), labels = c("0.00<br/>(Worse)", "0.25", "0.50", "0.75", "1.00<br/>(Better)")) +
  #scale_color_viridis_d(alpha = 1, begin = 0.3, end = 0.7, labels = c("*D* scores", "PI scores")) +
  #scale_shape_manual(labels = c("*D* scores", "PI scores"), values = c(15, 16)) +
  labs(x = "",
       y = "Proportion of participants with non-zero scores<br/>") + 
  theme(legend.position = "none") +
  coord_flip(ylim = c(0, 1))

p_prop_nonzero

results_diff_zero %>%
  round_df(2) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
DV_type estimate ci_lower ci_upper pi_lower pi_upper
D scores 0.13 0.12 0.15 0.1 0.18
PI scores 0.14 0.12 0.16 0.1 0.18
# tests
data_emms_diff_zero <- emmeans(fit_diff_zero, list(pairwise ~ DV_type), adjust = "holm") 

summary(data_emms_diff_zero)$`pairwise differences of DV_type` %>%
  as.data.frame() %>%
  select(comparison = 1, p.value) %>%
  mutate(p.value = ifelse(p.value < .00001, "< .00001", p.value)) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
comparison p.value
(D scores) - PI scores 0.4119893

Proportion different from one another

Within domain and trial type.

Note: Discriminability between a score and zero can be determined using the CI, because zero is a known value and only the score is measured with uncertainty. However, discriminability between two scores must take into account the uncertainty in the estimation of both scores. Weir (2005) argues that such an interval can be estimated by expanding the CIs by sqrt(2). Here I refer to these intervals as Discriminability Intervals (DIs).

Calculate discriminability

Many have argued that the zero point is arbitrary and not a useful reference point. Instead of asking “what proportion of D/PI scores are different from zero?”, we could also ask “what proportion of D/PI scores are different from one another?”

# helper function to apply workflow to each resample
discriminability <- function(data, i) {
  
  data_with_indexes <- data[i,] # boot function requires data and index
  
  estimate <- data_with_indexes$estimate
  di_lower <- data_with_indexes$di_lower 
  di_upper <- data_with_indexes$di_upper
  
  n_estimate <- length(estimate)
  n_di_lower <- length(di_lower)
  n_di_upper <- length(di_upper)
  
  r_estimate <- sum(rank(c(estimate, di_lower))[1:n_estimate])
  r_di_upper <- sum(rank(c(di_upper, estimate))[1:n_di_upper])
  
  prob_estimate_inferior_to_di_lower <- 1 - (r_estimate / n_estimate - (n_estimate + 1) / 2) / n_di_lower
  prob_estimate_superior_to_di_upper <- 1 - (r_di_upper / n_di_upper - (n_di_upper + 1) / 2) / n_estimate
  
  probability_estimates_outside_cis <- (prob_estimate_inferior_to_di_lower + prob_estimate_superior_to_di_upper)
  
  return(probability_estimates_outside_cis)
  
}

bootstrap_discriminability <- function(data){
  
  require(dplyr)
  require(boot)
  
  fit <- 
    boot::boot(data      = data, 
               statistic = discriminability, 
               R         = 5000,
               sim       = "ordinary", 
               stype     = "i",
               parallel  = "multicore", 
               ncpus     = parallel::detectCores())
  
  results <- boot::boot.ci(fit, conf = 0.95, type = "percent") # bca bootstraps throw an error, so use next best
  
  output <-
    tibble(
      estimate = fit$t0,
      ci_lower = results$percent[4],
      ci_upper = results$percent[5]
    )
  
  return(output)
}

D scores

# bootstrapping has a long execution time, so load saved values if they've already been calculated
if(file.exists("../data/results/data_discriminability_D.csv")) {
  
  data_discriminability_D <- read_csv("../data/results/data_discriminability_D.csv") %>%
    filter(method == "bca")
  
} else {
  
  # bootstrap D scores 
  data_discriminability_D <- data_estimates_D |>
    # expand CIs by sqrt(2) to form discriminability intervals
    mutate(di_lower = estimate - (estimate - ci_lower)*sqrt(2),
           di_upper = estimate + (ci_upper - estimate)*sqrt(2)) |>
    select(unique_id, domain, trial_type, estimate, di_upper, di_lower) |>
    group_by(domain, trial_type) |>
    do(bootstrap_discriminability(data = .)) |>
    ungroup() |>
    filter(method == "bca") |>
    rename(proportion_discriminable = estimate) |>
    mutate(variance = ((ci_upper - ci_lower)/(1.96*2))^2,
           variance = ifelse(variance == 0, 0.0001, variance),
           domain = as.factor(domain),
           trial_type = fct_relevel(trial_type, "tt1", "tt2", "tt3", "tt4"),
           DV_type = "*D* scores") |>
    # model cannot be run on zero variance or 0 or 1 logit, so offset by a minuscule amount
    mutate(
      proportion_discriminable_temp = case_when(proportion_discriminable < 0.001 ~ 0.001, 
                                                proportion_discriminable > 0.999 ~ 0.999,
                                                TRUE ~ proportion_discriminable),
      proportion_discriminable_logit = boot::logit(proportion_discriminable_temp)
    ) %>%
    select(-proportion_discriminable_temp) |>
    round_df(4)
  
  # save to disk
  write_csv(data_discriminability_D, "../data/results/data_discriminability_D.csv")
  
}

PI scores

# bootstrapping has a long execution time, so load saved values if they've already been calculated
if(file.exists("../data/results/data_discriminability_PI.csv")) {
  
  data_discriminability_PI <- read_csv("../data/results/data_discriminability_PI.csv") %>%
    filter(method == "bca")
  
} else {
  
  # bootstrap D scores 
  data_discriminability_PI <- data_estimates_PI |>
    # expand CIs by sqrt(2) to form discriminability intervals
    mutate(di_lower = estimate - (estimate - ci_lower)*sqrt(2),
           di_upper = estimate + (ci_upper - estimate)*sqrt(2)) |>
    select(unique_id, domain, trial_type, method, estimate, di_upper, di_lower) |>
    group_by(domain, trial_type) |>
    do(bootstrap_discriminability(data = .)) |>
    ungroup() |>
    filter(method == "bca") |>
    rename(proportion_discriminable = estimate) |>
    mutate(variance = ((ci_upper - ci_lower)/(1.96*2))^2,
           variance = ifelse(variance == 0, 0.0001, variance),
           domain = as.factor(domain),
           trial_type = fct_relevel(trial_type, "tt1", "tt2", "tt3", "tt4"),
           DV_type = "PI scores") |>
    # model cannot be run on zero variance or 0 or 1 logit, so offset by a minuscule amount
    mutate(
      proportion_discriminable_temp = case_when(proportion_discriminable < 0.001 ~ 0.001, 
                                                proportion_discriminable > 0.999 ~ 0.999,
                                                TRUE ~ proportion_discriminable),
      proportion_discriminable_logit = boot::logit(proportion_discriminable_temp)
    ) |>
    select(-proportion_discriminable_temp) |>
    round_df(4)
  
  # save to disk
  write_csv(data_discriminability_PI, "../data/results/data_discriminability_PI.csv")
  
}

Plot

# combine
data_discriminability_combined <- 
  bind_rows(data_discriminability_D,
            data_discriminability_PI) %>%
  mutate(trial_type = case_when(trial_type == "tt1" ~ "Trial type 1",
                                trial_type == "tt2" ~ "Trial type 2",   
                                trial_type == "tt3" ~ "Trial type 3",   
                                trial_type == "tt4" ~ "Trial type 4")) %>%
  filter(!(proportion_discriminable == 0 & variance == 0)) %>%
  mutate(variance = ifelse(variance == 0, 0.0001, variance)) 

p_discriminability <- 
  data_discriminability_combined %>%
  mutate(domain = fct_rev(factor(domain))) %>%
  ggplot(aes(proportion_discriminable, domain, color = DV_type, shape = DV_type)) +
  geom_linerangeh(aes(xmin = proportion_discriminable - sqrt(variance)*1.96,
                      xmax = proportion_discriminable + sqrt(variance)*1.96),
                  position = position_dodge(width = 0.75)) + 
  geom_point(position = position_dodge(width = 0.75)) +
  scale_shape_manual(labels = c("*D* scores", "PI scores"), values = c(15, 16)) +
  scale_color_viridis_d(begin = 0.3, end = 0.7, labels = c("*D* scores", "PI scores")) +
  mdthemes::md_theme_linedraw() +
  facet_wrap(~ trial_type, ncol = 4) +
  labs(x =  "Proportion of participants who whose scores<br/>differ from one another<br/>",
       y = "",
       color = "Scoring method",
       shape = "Scoring method") + 
  theme(legend.position = "top")

p_discriminability

Model

# fit meta analytic model
fit_disciminability <- 
  lmer(proportion_discriminable_logit ~ 1 + DV_type + (1 | domain/trial_type), 
       weights = 1/variance, 
       data = data_discriminability_combined)

# extract marginal means
results_emm_disciminability <-
  summary(emmeans(fit_disciminability, ~ DV_type)) %>%
  dplyr::select(DV_type, estimate = emmean, se = SE, ci_lower = lower.CL, ci_upper = upper.CL) 

# extract re Tau
results_re_tau_disciminability <- fit_disciminability %>%
  merTools::REsdExtract() %>%
  as_tibble(rownames = "trial_type") %>%
  rename(tau = value) 

# combine
results_disciminability <- results_emm_disciminability %>%
  mutate(pi_lower = estimate - (1.96 * sqrt(se^2 + results_re_tau_disciminability$tau[2]^2)),  # as in metafor package's implementation of credibility intervals, see metafor::predict.rma.R
         pi_upper = estimate + (1.96 * sqrt(se^2 + results_re_tau_disciminability$tau[2]^2))) |>
  select(-se) |>
  mutate_if(is.numeric, boot::inv.logit)

# plot
p_prop_discriminable <-
  ggplot(results_disciminability, aes(DV_type, estimate, 
                                              #color = DV_type, 
                                              #shape = DV_type, 
                                              #group = DV_type
  )) +
  geom_linerange(aes(ymin = pi_lower, ymax = pi_upper), size = 0.5, position = position_dodge(width = 0.8), linetype = "dotted") +
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), size = 1.25, position = position_dodge(width = 0.8)) +
  geom_point(position = position_dodge(width = 0.8), size = 2.5) +
  scale_y_continuous(breaks = c(0, .25, .5, .75, 1), labels = c("0.00<br/>(Worse)", "0.25", "0.50", "0.75", "1.00<br/>(Better)")) +
  #scale_shape_manual(labels = c("*D* scores", "PI scores"), values = c(15, 16)) +
  #scale_color_viridis_d(begin = 0.3, end = 0.7, labels = c("*D* scores", "PI scores")) +
  mdthemes::md_theme_linedraw() +
  labs(x = "",
       y = "Proportion of participants who whose scores<br/>differ from one another<br/>") +
  theme(legend.position = "none") +
  coord_flip(ylim = c(0, 1))

p_prop_discriminable 

results_disciminability %>%
  round_df(2) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
DV_type estimate ci_lower ci_upper pi_lower pi_upper
D scores 0.06 0.04 0.08 0.01 0.26
PI scores 0.05 0.04 0.07 0.01 0.22
# tests
data_emms_disciminability <- emmeans(fit_disciminability, list(pairwise ~ DV_type), adjust = "holm") 

summary(data_emms_disciminability)$`pairwise differences of DV_type` %>%
  as.data.frame() %>%
  select(comparison = 1, p.value) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
comparison p.value
(D scores) - PI scores 0.0046591

CI widths as a proportion of observed range

NB observed range of confidence intervals

Calculate scores

## calculate observed ranges 
observed_range_estimates_D <- data_estimates_D %>%
  group_by(domain, trial_type) %>%
  dplyr::summarize(min = min(ci_lower, na.rm = TRUE),
                   max = max(ci_upper, na.rm = TRUE),
                   .groups = "drop") %>%
  mutate(range = max - min) 

observed_range_estimates_PI <- data_estimates_PI %>%
  group_by(domain, trial_type) %>%
  dplyr::summarize(min = min(ci_lower, na.rm = TRUE),
                   max = max(ci_upper, na.rm = TRUE),
                   .groups = "drop") %>%
  mutate(range = max - min) 


# calculate CI / range 
data_ci_width_proportions_D <- data_estimates_D %>%
  # join this data into the original data
  full_join(observed_range_estimates_D, by = c("domain", "trial_type")) %>%
  # calculate ci width as a proportion of observed range
  mutate(ci_width_proportion = ci_width / range) %>%
  mutate(DV_type = "*D* scores") 

data_ci_width_proportions_PI <- data_estimates_PI %>%
  # join this data into the original data
  full_join(observed_range_estimates_PI, by = c("domain", "trial_type")) %>%
  # calculate ci width as a proportion of observed range
  mutate(ci_width_proportion = ci_width / range) %>%
  mutate(DV_type = "PI scores")

# combine
data_ci_width_proportions_combined <- 
  bind_rows(data_ci_width_proportions_D,
            data_ci_width_proportions_PI) %>%
  mutate(domain = as.factor(domain),
         trial_type = fct_relevel(trial_type, "tt1", "tt2", "tt3", "tt4")) %>%
  # logit transform
  mutate(ci_width_proportion_temp = case_when(ci_width_proportion < 0.0001 ~ 0.0001,
                                              ci_width_proportion > 0.9999 ~ 0.9999,
                                              TRUE ~ ci_width_proportion),
         ci_width_proportion_logit = boot::logit(ci_width_proportion_temp)) %>%
  select(-ci_width_proportion_temp)

Model

# fit model
fit_ci_width_proportions <- 
  lmer(ci_width_proportion_logit ~ 1 + DV_type + (1 | domain/trial_type), 
       data = data_ci_width_proportions_combined)

# extract marginal means
results_emm_ci_width_proportions <-
  summary(emmeans(fit_ci_width_proportions, ~ DV_type)) %>%
  dplyr::select(DV_type, estimate = emmean, se = SE, ci_lower = asymp.LCL, ci_upper = asymp.UCL)

# extract re Tau
results_re_tau_ci_width_proportions <-
  merTools::REsdExtract(fit_ci_width_proportions) %>%
  as_tibble(rownames = "trial_type") %>%
  rename(tau = value)

# combine
results_ci_width_proportions <- results_emm_ci_width_proportions %>%
  mutate(pi_lower = estimate - (1.96 * sqrt(se^2 + results_re_tau_ci_width_proportions$tau[2]^2)),  # as in metafor package's implementation of credibility intervals, see metafor::predict.rma.R
         pi_upper = estimate + (1.96 * sqrt(se^2 + results_re_tau_ci_width_proportions$tau[2]^2))) %>%
  select(-se) %>%
  mutate_if(is.numeric, boot::inv.logit)

# plot
p_ci_width_proportion_observed_range <-
  ggplot(results_ci_width_proportions, aes(DV_type, estimate, 
                                                   #color = DV_type, 
                                                   #shape = DV_type, 
                                                   #group = DV_type
  )) +
  geom_linerange(aes(ymin = pi_lower, ymax = pi_upper), size = 0.5, position = position_dodge(width = 0.8), linetype = "dotted") +
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), size = 1.25, position = position_dodge(width = 0.8)) +
  geom_point(position = position_dodge(width = 0.8), size = 2.5) +
  scale_shape_discrete(labels = c("*D* scores", "PI scores")) +
  scale_y_continuous(breaks = c(0, .25, .5, .75, 1), labels = c("0.00<br/>(Better)", "0.25", "0.50", "0.75", "1.00<br/>(Worse)")) +
  #scale_shape_manual(labels = c("*D* scores", "PI scores"), values = c(15, 16)) +
  #scale_color_viridis_d(begin = 0.3, end = 0.7, labels = c("*D* scores", "PI scores")) +
  mdthemes::md_theme_linedraw() +
  labs(x = "",
       y = "Proportion of observed range covered <br/>by individual participants' 95% CIs") +
  theme(legend.position = "none") +
  coord_flip(ylim = c(0, 1))

p_ci_width_proportion_observed_range

results_ci_width_proportions %>%
  round_df(2) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
DV_type estimate ci_lower ci_upper pi_lower pi_upper
D scores 0.51 0.50 0.53 0.42 0.61
PI scores 0.49 0.47 0.51 0.39 0.59
# tests
data_emms_ci_width_proportions <- emmeans(fit_ci_width_proportions, list(pairwise ~ DV_type), adjust = "holm") 

summary(data_emms_ci_width_proportions)$`pairwise differences of DV_type` %>%
  as.data.frame() %>%
  select(comparison = 1, p.value) %>%
  mutate(p.value = ifelse(p.value < .00001, "< .00001", p.value)) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
comparison p.value
(D scores) - PI scores < .00001

Combined plots

Plot 1

Plot 1 is merely illustrative. It shows the bootstrapped CIs for all participants, split by domain, but not splitting by trial type.

p_cis_by_domain

ggsave(filename  = "plots/figure_1_cis_by_domain.pdf",
       plot      = p_cis_by_domain,
       device    = "pdf",
       # path      = NULL,
       # dpi       = 300,
       units     = "in",
       width     = 8,
       height    = 12,
       limitsize = TRUE)

Plot 2

Most probable CI width for D scores when bootstrapped using four different methods. Very similar results are found across methods. Overall Maximum A-Posteroi width (MAP, i.e., the mode of a continuous variable) was D = 1.31. Some domains and trial types did demonstrate smaller most probable widths.

NB I elected not to meta-analyze these widths as they demonstrate very large skew at the individual level, which violate the assumptions of linear meta-analysis and underestimate the typical width (ie estimated mean widths << MAP observed widths). Rather than meta analyze, I simply report the domain and trial type level MAP values. More informative and valid analyses are presented below - ones which can directly compare the D and PI as an alternative. That could not be accomplished with a direct comparison with D/PI scores’ 95% CIs as they are on different scales and follow different distributions.

p_ci_widths

ggsave(filename  = "plots/figure_2_ci_widths.pdf",
       plot      = p_ci_widths,
       device    = "pdf",
       # path      = NULL,
       # dpi       = 300,
       units     = "in",
       width     = 8,
       height    = 6,
       limitsize = TRUE)

Plot 3

The results of three hierarchical/meta analytic models are presented below, all of which provide information via different methods regarding how informative an individual participants’ D (or PI) score is in terms of being able to state that they demonstrated evidence of a bias/IRAP effect/implicit attitude, whether that individual can be discriminated from other individuals in the same domain using the same trial type, and how much of the range of observed scores an individuals score’s CI spans.

  1. a meta-analysis of the proportion of participants that show non-zero scores (i.e., whose D or PI scores’ 95% CIs exclude zero), (2) the proportion of scores that are discriminable from one another. Pairwise comparisons are made between all participants’ scores within a domain and trial type, and the proportion that lie outside one another’s CIs can be inferred to be different from one another (i.e., are discriminable as different from one another). This analysis is useful because it avoids the issue or debate around how meaningful a score’s zero point is (i.e., D = 0, PI = 0.50) or whether it is a meaningful reference point. That is, previous research has argued that D = 0 cannot be inferred to represent no bias. Discriminability is agnostic to any individual comparison point and avoids this issue. (3) A meta analysis of the ratio between each participants’ score and the maximium observed range of scores for that domain and trial type. I.e., given the observed range of scores across participants, what proportion of that range did each individual participant’s score’s Confidence Interval span. If each individual’s CIs are compatible with a large proportion of the total range of all observed socres, then each score is so poorly estimated as to tell us very little about where on the spectrum each participant lies.

All meta analyses compare D and PI scores to assess whether the results are dependent on the D algorithm which has been argued to be suboptimal. That is, I assess whether this issue can be trivially resolved by scoring the data a different way.

Note that the theoretical max possible range of D scores is -2 to +2, but such extreme scores are practically impossible. As such, in order to understand the CI width in terms of realistic data - and also in order to compare D and PI which are on different scales and distributions - I standardize CI widths by the observed range of scores for each domain and trial type.

p_combined <- 
  p_prop_nonzero + 
  p_prop_discriminable + 
  p_ci_width_proportion_observed_range +
  plot_layout(ncol = 1) #, guides = "collect") & theme(legend.position = "top")

p_combined

ggsave(filename  = "plots/figure_3_metaanalyses bca.pdf",
       plot      = p_combined,
       device    = "pdf",
       # path      = NULL,
       # dpi       = 300,
       units     = "in",
       width     = 5,
       height    = 5,
       limitsize = TRUE)

Session info

sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur/Monterey 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_IE.UTF-8/en_IE.UTF-8/en_IE.UTF-8/C/en_IE.UTF-8/en_IE.UTF-8
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] janitor_2.1.0     ggstance_0.3.5    emmeans_1.7.3     sjPlot_2.8.10    
##  [5] lme4_1.1-29       Matrix_1.4-1      mdthemes_0.1.0    patchwork_1.1.1  
##  [9] bayestestR_0.12.1 boot_1.3-28       kableExtra_1.3.4  knitr_1.39       
## [13] forcats_0.5.1     stringr_1.4.0     dplyr_1.0.9       purrr_0.3.4      
## [17] readr_2.1.2       tidyr_1.2.0       tibble_3.1.8      ggplot2_3.3.6    
## [21] tidyverse_1.3.1  
## 
## loaded via a namespace (and not attached):
##   [1] TH.data_1.1-1       minqa_1.2.4         colorspace_2.0-3   
##   [4] ellipsis_0.3.2      sjlabelled_1.2.0    estimability_1.3   
##   [7] snakecase_0.11.0    markdown_1.1        parameters_0.18.1  
##  [10] fs_1.5.2            gridtext_0.1.4      ggtext_0.1.1       
##  [13] rstudioapi_0.13     listenv_0.8.0       furrr_0.3.0        
##  [16] farver_2.1.1        bit64_4.0.5         fansi_1.0.3        
##  [19] mvtnorm_1.1-3       lubridate_1.8.0     xml2_1.3.3         
##  [22] codetools_0.2-18    splines_4.2.0       sjmisc_2.8.9       
##  [25] jsonlite_1.8.0      nloptr_2.0.3        ggeffects_1.1.2    
##  [28] pbkrtest_0.5.1      broom_0.8.0         dbplyr_2.1.1       
##  [31] broom.mixed_0.2.9.4 shiny_1.7.1         effectsize_0.6.0.1 
##  [34] compiler_4.2.0      httr_1.4.3          sjstats_0.18.1     
##  [37] backports_1.4.1     assertthat_0.2.1    fastmap_1.1.0      
##  [40] cli_3.3.0           later_1.3.0         htmltools_0.5.2    
##  [43] tools_4.2.0         coda_0.19-4         gtable_0.3.0       
##  [46] glue_1.6.2          merTools_0.5.2      Rcpp_1.0.9         
##  [49] cellranger_1.1.0    jquerylib_0.1.4     vctrs_0.4.1        
##  [52] svglite_2.1.0       nlme_3.1-157        iterators_1.0.14   
##  [55] insight_0.18.0      xfun_0.31           globals_0.14.0     
##  [58] rvest_1.0.2         mime_0.12           lifecycle_1.0.1    
##  [61] future_1.25.0       MASS_7.3-56         zoo_1.8-10         
##  [64] scales_1.2.0        vroom_1.5.7         promises_1.2.0.1   
##  [67] hms_1.1.1           sandwich_3.0-1      yaml_2.3.5         
##  [70] sass_0.4.1          stringi_1.7.8       highr_0.9          
##  [73] foreach_1.5.2       plotrix_3.8-2       blme_1.0-5         
##  [76] rlang_1.0.4         pkgconfig_2.0.3     systemfonts_1.0.4  
##  [79] arm_1.12-2          evaluate_0.15       lattice_0.20-45    
##  [82] labeling_0.4.2      bit_4.0.4           tidyselect_1.1.2   
##  [85] parallelly_1.31.1   magrittr_2.0.3      R6_2.5.1           
##  [88] generics_0.1.2      multcomp_1.4-19     DBI_1.1.2          
##  [91] pillar_1.8.0        haven_2.5.0         withr_2.5.0        
##  [94] abind_1.4-5         survival_3.3-1      datawizard_0.4.1   
##  [97] performance_0.9.1   modelr_0.1.8        crayon_1.5.1       
## [100] utf8_1.2.2          tzdb_0.3.0          rmarkdown_2.14     
## [103] grid_4.2.0          readxl_1.4.0        reprex_2.0.1       
## [106] digest_0.6.29       webshot_0.5.3       xtable_1.8-4       
## [109] httpuv_1.6.5        munsell_0.5.0       viridisLite_0.4.0  
## [112] bslib_0.3.1